// bdlde_hexdecoder.h                                                 -*-C++-*-
#ifndef INCLUDED_BDLDE_HEXDECODER
#define INCLUDED_BDLDE_HEXDECODER

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide mechanism for decoding text from hexadecimal.
//
//@CLASSES:
//  bdlde::HexDecoder: mechanism for decoding text from hexadecimal
//
//@SEE_ALSO: bdlde_hexencoder
//
//@DESCRIPTION: This component provides a class, `bdlde::HexDecoder`, for
// decoding hexadecimal representation into plain text.
//
// `bdlde::HexEncoder` and `bdlde::HexDecoder` provide a pair of template
// functions (each parameterized separately on both input and output iterators)
// that can be used respectively to encode and to decode byte sequences of
// arbitrary length into and from the printable Hex representation.
//
// Each instance of either the encoder or decoder retains the state of the
// conversion from one supplied input to the next, enabling the processing of
// segmented input -- i.e., processing resumes where it left off with the next
// invocation on new input.  Instance methods are provided for both the
// encoder and decoder to (1) assert the end of input, (2) determine whether
// the input so far is currently acceptable, and (3) indicate whether a
// non-recoverable error has occurred.
//
///Hex Encoding
///------------
// The data stream is processed one byte at a time from left to right.  Each
// byte
// ```
//     7 6 5 4 3 2 1 0
//    +-+-+-+-+-+-+-+-+
//    |               |
//    +-+-+-+-+-+-+-+-+
//     `------v------'
//           Byte
// ```
// is segmented into two intermediate 4-bit quantities.
// ```
//     3 2 1 0 3 2 1 0
//    +-+-+-+-+-+-+-+-+
//    |       |       |
//    +-+-+-+-+-+-+-+-+
//     `--v--' `--v--'
//      char0   char1
// ```
// Each 4-bit quantity is in turn used as an index into the following character
// table to generate an 8-bit character.
// ```
//    =================
//    *  Hex Alphabet *
//    -----------------
//    Val Enc  Val Enc
//    --- ---  --- ---
//      0 '0'    8 '8'
//      1 '1'    9 '9'
//      2 '2'   10 'A'
//      3 '3'   11 'B'
//      4 '4'   12 'C'
//      5 '5'   13 'D'
//      6 '6'   14 'E'
//      7 '7'   15 'F'
//    =================
// ```
// Depending on the settings encoder represents values from 10 to 15 as
// uppercase (`A`-`F`) or lowercase letters(`a`-`f`).
//
// Input values of increasing length along with their corresponding Hex
// encodings are illustrated below:
// ```
//       Data: /* nothing */
//   Encoding: /* nothing */
//
//       Data: "0"     (0011 0000)
//   Encoding: 30
//
//       Data: "01"    (0011 0000 0011 0001)
//   Encoding: 3031
//
//       Data: "01A"   (0011 0000 0011 0001 1000 0001)
//   Encoding: 303141
//
//       Data: "01A?"  (0011 0000 0011 0001 1000 0001 0011 1111)
//   Encoding: 3031413F
// ```
//
///Hex Decoding
///------------
// The data stream is processed two bytes at a time from left to right.  Each
// sequence of two 8-bit quantities
// ```
//     7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//    |               |               |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//     `------v------' `------v------'
//          Byte0           Byte1
// ```
// is segmented into four intermediate 4-bit quantities.
// ```
//     3 2 1 0 3 2 1 0 3 2 1 0 3 2 1 0
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//    |       |       |       |       |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//     `--v--' `--v--' `--v--' `--v--'
//     chunk0   chunk1  chunk2  chunk3
// ```
// The second and forth chunks are combined to get the resulting 8-bit
// character.
//
// Whitespace characters are ignored.  On any non-alphabet character the
// decoder reports an error.  In order for a Hex encoding to be valid the
// length of the input data (excluding any whitespace characters) must be a
// multiple of two.
//
// Input values of increasing length along with their corresponding Hex
// encodings are illustrated below (note that the encoded whitespace character
// is skipped and the resulting string does not contain it):
// ```
//       Data: /* nothing */
//   Encoding: /* nothing */
//
//       Data: "4"       (0000 0100)
//   Encoding: /* nothing */
//
//       Data: "41"      (0000 0100 0000 0001)
//   Encoding: A
//
//       Data: "412"     (0000 0100 0000 0001 0000 0010)
//   Encoding: A
//
//       Data: "4120"    (0000 0100 0000 0001 0000 0010 0000 0000)
//   Encoding: A
//
//       Data: "41203"   (0000 0100 0000 0001 0000 0010 0000 0000
//                        0000 0011)
//   Encoding: A
//
//       Data: "41203F"  (0011 0000 0011 0001 1000 0001 0010 0011
//                        0000 0011 0000 1111)
//   Encoding: A?
// ```
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Basic Usage of `bdlde::HexDecoder`
///- - - - - - - - - - - - - - - - - - - - - - -
// The following example shows how to use a `bdlde::HexDecoder` object to
// implement a function, `streamDecoder`, that reads hex representation from
// `bsl::istream`, decodes that text, and writes the decoded text to a
// `bsl::ostream`.  `streamDecoder` returns 0 on success and a negative value
// if the input data could not be successfully decoded or if there is an I/O
// error.
// ```
// /// Read the entire contents of the specified input stream `is`, convert
// /// the input hex encoding into plain text, and write the decoded text
// /// to the specified output stream `os`.  Return 0 on success, and a
// /// negative value otherwise.
// int streamDecoder(bsl::ostream& os, bsl::istream& is)
// {
//     enum {
//         SUCCESS      =  0,
//         DECODE_ERROR = -1,
//         IO_ERROR     = -2
//     };
// ```
// First we create an object, create buffers for storing data, and start loop
// that runs while the input stream contains some data:
// ```
//     bdlde::HexDecoder converter;
//
//     const int INBUFFER_SIZE  = 1 << 10;
//     const int OUTBUFFER_SIZE = 1 << 10;
//
//     char inputBuffer[INBUFFER_SIZE];
//     char outputBuffer[OUTBUFFER_SIZE];
//
//     char *output    = outputBuffer;
//     char *outputEnd = outputBuffer + sizeof outputBuffer;
//
//     while (is.good()) {  // input stream not exhausted
// ```
// On each iteration we read some data from the input stream:
// ```
//         is.read(inputBuffer, sizeof inputBuffer);
//
//         const char *input    = inputBuffer;
//         const char *inputEnd = input + is.gcount();
//
//         while (input < inputEnd) { // input encoding not complete
//
//             int numOut = 0;
//             int numIn  = 0;
// ```
// Convert obtained text using `bdlde::HexDecoder`:
// ```
//             int status = converter.convert(
//                                      output,
//                                      &numOut,
//                                      &numIn,
//                                      input,
//                                      inputEnd,
//                                      static_cast<int>(outputEnd - output));
//             if (status < 0) {
//                 return DECODE_ERROR;                              // RETURN
//             }
//
//             output += numOut;
//             input  += numIn;
// ```
// And write decoded text to the output stream:
// ```
//             if (output == outputEnd) {  // output buffer full; write data
//                 os.write(outputBuffer, sizeof outputBuffer);
//                 if (os.fail()) {
//                     return IO_ERROR;                              // RETURN
//                 }
//                 output = outputBuffer;
//             }
//         }
//     }
//
//     if (output > outputBuffer) {
//         os.write (outputBuffer, output - outputBuffer);
//     }
// ```
// Then we need to complete the work of our decoder:
// ```
//     int more = converter.endConvert();
//     if (more < 0) {
//         return DECODE_ERROR;                                      // RETURN
//     }
//
//     return is.eof() && os.good() ? SUCCESS : IO_ERROR;
// }
// ```
// Next, to demonstrate how our function works we need to create a stream with
// encoded data.  Assume that we have some character string, `BLOOMBERG_NEWS`,
// and a function, `streamEncoder` mirroring the work of the `streamDecoder`:
// ```
// bsl::istringstream inStream(bsl::string(BLOOMBERG_NEWS,
//                                         strlen(BLOOMBERG_NEWS)));
// bsl::stringstream  outStream;
// bsl::stringstream  backInStream;
//
// assert(0 == streamEncoder(outStream,    inStream));
// ```
// Now, we use our function to decode text:
// ```
// assert(0 == streamDecoder(backInStream, outStream));
// ```
// Finally, we observe that the output fully matches the original text:
// ```
// assert(0 == strcmp(BLOOMBERG_NEWS, backInStream.str().c_str()));
// ```

#include <bdlscm_version.h>

#include <bsls_assert.h>

#include <bsl_iterator.h>

namespace BloombergLP {
namespace bdlde {

/// This class implements a mechanism capable of converting data of
/// arbitrary length from its corresponding Hex representation.
class HexDecoder {

    // PRIVATE TYPES

    /// Symbolic state values for the decoder.
    enum States {
        e_ERROR_STATE        = -1, // input is irreparably invalid
        e_INPUT_STATE        =  0, // general input state
        e_DONE_STATE         =  1  // any additional input is an error
    };

    // DATA
    int         d_state;         // current state of this object
    char        d_firstDigit;    // first (left) hex digit to decode
    int         d_outputLength;  // total number of output characters
    const char *d_decodeTable_p; // character code table

    // PRIVATE CLASS METHODS

    /// Return `true` if the specified `character` is whitespace (i.e.,
    /// space, tab, CR, NL, VT, or FF), and `false` otherwise.
    static bool isSpace(char character);

    /// Return `true` if the specified `character` is a hex digit, and
    /// `false` otherwise.
    static bool isXdigit(char character);

    // NOT IMPLEMENTED
    HexDecoder(const HexDecoder&);
    HexDecoder& operator=(const HexDecoder&);

  public:
    // CREATORS

    /// Create a Hex decoder in the initial state.
    HexDecoder();

    /// Destroy this object.
    //! ~HexDecoder() = default;

    // MANIPULATORS

    /// Append to the buffer addressed by the specified `out` all pending
    /// output (if there is any) up to the optionally specified `maxNumOut`
    /// limit (default is negative, meaning no limit).  When there is no
    /// pending output and the `maxNumOut` is still not reached, begin to
    /// consume and decode a sequence of input characters starting at the
    /// specified `begin` position, up to but not including the specified
    /// `end` position.  Any resulting output is written to the `out` buffer
    /// up to the (cumulative) `maxNumOut` limit.  If `maxNumOut` limit is
    /// reached, no further input will be consumed.  Load into the
    /// (optionally) specified `numOut` and `numIn` the number of output
    /// bytes produced and input bytes consumed, respectively.  Return 0 on
    /// success and a negative value otherwise.  Note that calling this
    /// method after `endConvert` has been invoked without an intervening
    /// `reset` call will place this instance in an error state, and return
    /// an error status.
    template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
    int convert(OUTPUT_ITERATOR out,
                INPUT_ITERATOR  begin,
                INPUT_ITERATOR  end);
    template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
    int convert(OUTPUT_ITERATOR  out,
                int             *numOut,
                int             *numIn,
                INPUT_ITERATOR   begin,
                INPUT_ITERATOR   end,
                int              maxNumOut = -1);

    /// Terminate encoding for this decoder.  Return 0 on success, and a
    /// negative value otherwise.
    int endConvert();

    /// Reset this decoder to its initial state (i.e., as if no input had
    /// been consumed).
    void reset();

    // ACCESSORS

    /// Return `true` if the input read so far by this decoder is considered
    /// syntactically complete and all resulting output has been emitted;
    /// return `false` otherwise.  Note that there must not be any
    /// unprocessed characters accumulated in the input buffer of this
    /// decoder.
    bool isAcceptable() const;

    /// Return `true` if this decoder is in the done state (i.e.,
    /// `endConvert` has been called and any additional input will result in
    /// an error), and if there is no pending output; return `false`
    /// otherwise.
    bool isDone() const;

    /// Return `true` if this decoder has encountered an irrecoverable error
    /// and `false` otherwise.  An irrecoverable error is one for which
    /// there is no subsequent possibility of achieving an "acceptable"
    /// result (as defined by the `isAcceptable` method).
    bool isError() const;

    /// Return `true` if this decoder is in the initial state (i.e., as if
    /// no input had been consumed) and `false` otherwise.
    bool isInitialState() const;

    /// Return `true` if the input to this decoder is maximal (i.e., the
    /// input contains an end-of-input sentinel, signaling that no further
    /// input should be expected).  *Always* returns `false` for Hex
    /// decoders since the encoding scheme does not specify an end-of-input
    /// sentinel.
    bool isMaximal() const;

    /// Return the total length of the output emitted by this decoder
    /// (possibly after several calls to the `convert` or the `input`
    /// methods) since its initial construction or the latest `reset`.
    int outputLength() const;
};

// ============================================================================
//                             INLINE DEFINITIONS
// ============================================================================

// PRIVATE CLASS METHODS
inline
bool HexDecoder::isSpace(char character)
{
    static const bool k_SPACE_TABLE[256] = {
    // 0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,  // 00
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 10
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 20
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 30
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 40
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 50
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 60
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 70
    };

    return k_SPACE_TABLE[static_cast<unsigned char>(character)];
}

inline
bool HexDecoder::isXdigit(char character)
{
    static const bool k_XDIGIT_TABLE[256] = {
    // 0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 00
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 10
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 20
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  // 30
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 40
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 50
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 60
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 70
    };

    return k_XDIGIT_TABLE[static_cast<unsigned char>(character)];
}

// MANIPULATORS
template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
int HexDecoder::convert(OUTPUT_ITERATOR out,
                        INPUT_ITERATOR  begin,
                        INPUT_ITERATOR  end)
{
    int dummyNumOut;
    int dummyNumIn;

    return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);
}

template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
int HexDecoder::convert(OUTPUT_ITERATOR  out,
                        int             *numOut,
                        int             *numIn,
                        INPUT_ITERATOR   begin,
                        INPUT_ITERATOR   end,
                        int              maxNumOut)
{
    BSLS_ASSERT(numOut);
    BSLS_ASSERT(numIn);

    if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) {
        int rv  = e_DONE_STATE == d_state ? -2 : -1;
        d_state = e_ERROR_STATE;
        *numOut = 0;
        *numIn  = 0;
        return rv;                                                    // RETURN
    }

    if (0 == maxNumOut) {
        *numOut = 0;
        *numIn = 0;
        return 0;                                                     // RETURN
    }

    INPUT_ITERATOR originalBegin = begin;
    int            numEmitted = 0;

    while (begin != end && numEmitted != maxNumOut) {
        const char digit = static_cast<char>(*begin);
        ++begin;

        if (!isSpace(digit)) {
            if (!isXdigit(digit)) {
                *numOut = numEmitted;
                d_outputLength += numEmitted;
                *numIn = static_cast<int>(bsl::distance(originalBegin, begin));
                d_state = e_ERROR_STATE;
                return -1;                                            // RETURN
            }

            if (0 == d_firstDigit) {
                d_firstDigit = digit;
            }
            else {
                char value = static_cast<char>(
                       (d_decodeTable_p[static_cast<int>(d_firstDigit)] << 4) |
                       (d_decodeTable_p[static_cast<int>(digit       )]));
                *out = value;

                ++out;
                ++numEmitted;
                d_firstDigit = 0;
            }
        }
    }

    *numOut = numEmitted;
    d_outputLength += numEmitted;
    *numIn = static_cast<int>(bsl::distance(originalBegin, begin));
    return 0;
}

inline
void HexDecoder::reset()
{
    d_state = e_INPUT_STATE;
    d_firstDigit = 0;
    d_outputLength = 0;
}

// ACCESSORS
inline
bool HexDecoder::isAcceptable() const
{
    return e_INPUT_STATE == d_state && !d_firstDigit;
}

inline
bool HexDecoder::isDone() const
{
    return e_DONE_STATE == d_state;
}

inline
bool HexDecoder::isError() const
{
    return e_ERROR_STATE == d_state;
}

inline
bool HexDecoder::isInitialState() const
{
    return e_INPUT_STATE == d_state && 0 == d_outputLength && !d_firstDigit;
}

inline
bool HexDecoder::isMaximal() const
{
    return false;
}

inline
int HexDecoder::outputLength() const
{
    return d_outputLength;
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2022 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------
